In [108]:
# !pip install seaborn
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
In [13]:
# ! pip install --upgrade seaborn
Requirement already up-to-date: seaborn in c:\python37\lib\site-packages (0.9.0)
Requirement already satisfied, skipping upgrade: matplotlib>=1.4.3 in c:\python37\lib\site-packages (from seaborn) (3.1.0)
Requirement already satisfied, skipping upgrade: pandas>=0.15.2 in c:\python37\lib\site-packages (from seaborn) (0.24.2)
Requirement already satisfied, skipping upgrade: numpy>=1.9.3 in c:\python37\lib\site-packages (from seaborn) (1.16.4)
Requirement already satisfied, skipping upgrade: scipy>=0.14.0 in c:\python37\lib\site-packages (from seaborn) (1.3.0)
Requirement already satisfied, skipping upgrade: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in c:\python37\lib\site-packages (from matplotlib>=1.4.3->seaborn) (2.4.0)
Requirement already satisfied, skipping upgrade: kiwisolver>=1.0.1 in c:\python37\lib\site-packages (from matplotlib>=1.4.3->seaborn) (1.1.0)
Requirement already satisfied, skipping upgrade: cycler>=0.10 in c:\python37\lib\site-packages (from matplotlib>=1.4.3->seaborn) (0.10.0)
Requirement already satisfied, skipping upgrade: python-dateutil>=2.1 in c:\python37\lib\site-packages (from matplotlib>=1.4.3->seaborn) (2.8.0)
Requirement already satisfied, skipping upgrade: pytz>=2011k in c:\python37\lib\site-packages (from pandas>=0.15.2->seaborn) (2019.1)
Requirement already satisfied, skipping upgrade: setuptools in c:\python37\lib\site-packages (from kiwisolver>=1.0.1->matplotlib>=1.4.3->seaborn) (40.8.0)
Requirement already satisfied, skipping upgrade: six in c:\python37\lib\site-packages (from cycler>=0.10->matplotlib>=1.4.3->seaborn) (1.12.0)
In [3]:
import pandas as pd
In [4]:
wine_data =  pd.read_csv('winequality-white.csv')
In [5]:
wine_data.head()
Out[5]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
0 7.0 0.27 0.36 20.7 0.045 45.0 170.0 1.0010 3.00 0.45 8.8 6
1 6.3 0.30 0.34 1.6 0.049 14.0 132.0 0.9940 3.30 0.49 9.5 6
2 8.1 0.28 0.40 6.9 0.050 30.0 97.0 0.9951 3.26 0.44 10.1 6
3 7.2 0.23 0.32 8.5 0.058 47.0 186.0 0.9956 3.19 0.40 9.9 6
4 7.2 0.23 0.32 8.5 0.058 47.0 186.0 0.9956 3.19 0.40 9.9 6
In [6]:
wine_data.columns
Out[6]:
Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')
In [7]:
wine_data.tail()
Out[7]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
4893 6.2 0.21 0.29 1.6 0.039 24.0 92.0 0.99114 3.27 0.50 11.2 6
4894 6.6 0.32 0.36 8.0 0.047 57.0 168.0 0.99490 3.15 0.46 9.6 5
4895 6.5 0.24 0.19 1.2 0.041 30.0 111.0 0.99254 2.99 0.46 9.4 6
4896 5.5 0.29 0.30 1.1 0.022 20.0 110.0 0.98869 3.34 0.38 12.8 7
4897 6.0 0.21 0.38 0.8 0.020 22.0 98.0 0.98941 3.26 0.32 11.8 6
In [8]:
len(wine_data)
Out[8]:
4898
In [9]:
wine_data.describe()
Out[9]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
count 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000 4898.000000
mean 6.854788 0.278241 0.334192 6.391415 0.045772 35.308085 138.360657 0.994027 3.188267 0.489847 10.514267 5.877909
std 0.843868 0.100795 0.121020 5.072058 0.021848 17.007137 42.498065 0.002991 0.151001 0.114126 1.230621 0.885639
min 3.800000 0.080000 0.000000 0.600000 0.009000 2.000000 9.000000 0.987110 2.720000 0.220000 8.000000 3.000000
25% 6.300000 0.210000 0.270000 1.700000 0.036000 23.000000 108.000000 0.991723 3.090000 0.410000 9.500000 5.000000
50% 6.800000 0.260000 0.320000 5.200000 0.043000 34.000000 134.000000 0.993740 3.180000 0.470000 10.400000 6.000000
75% 7.300000 0.320000 0.390000 9.900000 0.050000 46.000000 167.000000 0.996100 3.280000 0.550000 11.400000 6.000000
max 14.200000 1.100000 1.660000 65.800000 0.346000 289.000000 440.000000 1.038980 3.820000 1.080000 14.200000 9.000000

distplot

In [14]:
import seaborn as sns
import matplotlib.pyplot as plt
In [15]:
wine_data = pd.read_csv('winequality-white.csv')
wine_data.columns = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar',
       'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality']
In [16]:
wine_data.tail(10)
Out[16]:
fixed_acidity volatile_acidity citric_acid residual_sugar chlorides free_sulfur_dioxide total_sulfur_dioxide density pH sulphates alcohol quality
4888 6.8 0.220 0.36 1.20 0.052 38.0 127.0 0.99330 3.04 0.54 9.2 5
4889 4.9 0.235 0.27 11.75 0.030 34.0 118.0 0.99540 3.07 0.50 9.4 6
4890 6.1 0.340 0.29 2.20 0.036 25.0 100.0 0.98938 3.06 0.44 11.8 6
4891 5.7 0.210 0.32 0.90 0.038 38.0 121.0 0.99074 3.24 0.46 10.6 6
4892 6.5 0.230 0.38 1.30 0.032 29.0 112.0 0.99298 3.29 0.54 9.7 5
4893 6.2 0.210 0.29 1.60 0.039 24.0 92.0 0.99114 3.27 0.50 11.2 6
4894 6.6 0.320 0.36 8.00 0.047 57.0 168.0 0.99490 3.15 0.46 9.6 5
4895 6.5 0.240 0.19 1.20 0.041 30.0 111.0 0.99254 2.99 0.46 9.4 6
4896 5.5 0.290 0.30 1.10 0.022 20.0 110.0 0.98869 3.34 0.38 12.8 7
4897 6.0 0.210 0.38 0.80 0.020 22.0 98.0 0.98941 3.26 0.32 11.8 6
In [17]:
plt.hist(wine_data.alcohol)
Out[17]:
(array([ 37., 808., 969., 761., 765., 625., 427., 368., 110.,  28.]),
 array([ 8.  ,  8.62,  9.24,  9.86, 10.48, 11.1 , 11.72, 12.34, 12.96,
        13.58, 14.2 ]),
 <a list of 10 Patch objects>)
In [18]:
sns.distplot(wine_data.alcohol, kde=True)  # histogram bars KDE:gaussian kernel density estimate.
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c46e366c50>

around 40% have alcohol content = 9.3

In [19]:
f,ax = plt.subplots(figsize = (15,5))
sns.distplot(wine_data.alcohol, kde=True)
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c46e4389e8>
In [20]:
f,ax = plt.subplots(figsize = (15,5))
sns.distplot(wine_data.alcohol, kde=False, rug=True , bins = 200) 
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c46e682978>

rug: Whether to draw a rugplot on the support axis.

bins control granularity of the bars , bins = more size -> you can analyse the data more deep

In [21]:
f,ax = plt.subplots(figsize = (15,5))
sns.rugplot(wine_data.alcohol, height = 0.75) # most of the rug distribution is clustered around centre 
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c46e4da828>
In [22]:
f,ax = plt.subplots(figsize = (15,5))
sns.distplot(wine_data.alcohol, kde=True, rug=True, hist = False)
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c46f0885c0>

shade the Kernal density estimation area

In [23]:
f,ax = plt.subplots(figsize = (15,5))
sns.set(color_codes=True)
sns.kdeplot(wine_data.alcohol, shade=True, color ='r')
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c46f491ba8>

plotting KDE with various bandwidth that fits the original data

In [24]:
f,ax = plt.subplots(figsize=(15,5))
sns.kdeplot(wine_data.alcohol)
sns.kdeplot(wine_data.alcohol, bw = 0.04 , label = 'bw =0.04 ' )
sns.kdeplot(wine_data.alcohol, bw = 0.2 , label = 'bw =0.2 ' )
sns.kdeplot(wine_data.alcohol, bw = 2 , label = 'bw =2 ' )
sns.kdeplot(wine_data.alcohol, bw = 5 , label = 'bw =5 ' )
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c46f329048>

the more erratic the plot is the less generic information it provides

Joint Plots along with univariate distribution

In [25]:
sns.jointplot(x = wine_data.free_sulfur_dioxide, y= wine_data.total_sulfur_dioxide, xlim={0,150}, ylim={0,400}, alpha=0.2) # ALPHA STILL WORKS HERE
Out[25]:
<seaborn.axisgrid.JointGrid at 0x1c46f3940b8>

Hexbin Plots - Histogram representation of bivariate plot

the problem here in jointplot is at the middle we can not decide the relationship between wine_data.free_sulfur_dioxide wine_data.total_sulfur_dioxide ; solution to this is plotting Hexbin plot with Hue variations

how dark and how light a perticular point is represents the height of histogram at intersection

In [26]:
sns.jointplot(x = wine_data.free_sulfur_dioxide, y= wine_data.total_sulfur_dioxide, kind='hex', xlim={0,100}, ylim={0,200})
Out[26]:
<seaborn.axisgrid.JointGrid at 0x1c46f761208>

KDE curves for bivariate distribution

In [27]:
sns.jointplot(x = wine_data.free_sulfur_dioxide, y= wine_data.total_sulfur_dioxide, kind='kde', xlim={0,80}, ylim={0,300})
Out[27]:
<seaborn.axisgrid.JointGrid at 0x1c46fb33860>
In [28]:
f,ax = plt.subplots(figsize = (8,5))
sns.kdeplot(wine_data.free_sulfur_dioxide, wine_data.total_sulfur_dioxide)
sns.rugplot(wine_data.free_sulfur_dioxide, color = 'g',height=0.05)
sns.rugplot(wine_data.total_sulfur_dioxide, color = 'b', height=0.05, vertical = True)
plt.xlim(-20,100)
plt.ylim(-50,400)
Out[28]:
(-50, 400)

plot bivariate relationships between every pair of columns in a dataset

In [29]:
# sns.pairplot(wine_data, height=3) # rn this it takes a lot of time 
In [30]:
sns.pairplot(wine_data, height=3, vars=['fixed_acidity','chlorides','sulphates', 'alcohol', 'quality'], diag_kind='kde')
Out[30]:
<seaborn.axisgrid.PairGrid at 0x1c4701c9358>

pairwise regressions

In [31]:
sns.pairplot(wine_data, height=3, vars=['chlorides','sulphates', 'quality'], kind='reg') 
Out[31]:
<seaborn.axisgrid.PairGrid at 0x1c471f873c8>
In [32]:
g= sns.PairGrid(wine_data, height=3, vars=['chlorides','sulphates', 'quality']) 
g.map(plt.scatter)
Out[32]:
<seaborn.axisgrid.PairGrid at 0x1c472947940>

dont you think it is stupid to plot a univariate varaible against itself... ok lets do one thing let's plot KDE for diagonal and scatter for non diagonal plots

In [33]:
g= sns.PairGrid(wine_data,vars=['chlorides','sulphates', 'alcohol'], hue='quality')
g.map_offdiag(plt.scatter)
g.map_diag(sns.kdeplot)
plt.legend(loc=2)
Out[33]:
<matplotlib.legend.Legend at 0x1c472f260f0>

Now how about tuning PairGrid even further map_upper map_lower map_diag

In [34]:
g=sns.PairGrid(wine_data,vars=['chlorides','sulphates', 'alcohol'])

g.map_diag(sns.kdeplot)
g.map_upper(sns.scatterplot)
g.map_lower(sns.regplot)
Out[34]:
<seaborn.axisgrid.PairGrid at 0x1c4735f97b8>

Howabout using PairGrid with x_vars and y_vars differet

In [35]:
g=sns.PairGrid(wine_data,x_vars=['chlorides','sulphates', 'fixed_acidity'], y_vars=['alcohol'])
g.map(plt.scatter)

# g.map_diag(sns.kdeplot)
# g.map_upper(sns.scatterplot)
# g.map_lower(sns.regplot)
Out[35]:
<seaborn.axisgrid.PairGrid at 0x1c474c49240>
In [36]:
g=sns.PairGrid(wine_data,x_vars=['chlorides','sulphates', 'fixed_acidity'], y_vars=['alcohol'])
g.map(sns.scatterplot) # sns looks more beautiful
Out[36]:
<seaborn.axisgrid.PairGrid at 0x1c474e0d518>
In [37]:
# sns.pairplot(wine_data, height=3, vars=['fixed_acidity','chlorides','sulphates', 'quality'], hue = 'pH') 

Heatmaps

In [38]:
corrmat = wine_data.corr()
f,ax = plt.subplots(figsize=(10,10))
# sns.heatmap(corrmat, vmin = -0.9, vmax=0.95, square = True, annot= True, fmt='.2f', cmap='summer' )
sns.heatmap(corrmat, annot=True, fmt='0.2f', square =  True)
Out[38]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c474ecbd30>

lmplots and regplots for regression

In [39]:
sns.lmplot(x='residual_sugar', y='density', data = wine_data, height = 7, aspect=2)
Out[39]:
<seaborn.axisgrid.FacetGrid at 0x1c4752f71d0>

when MSE is low CI is narrow and whereever the MSE is high CI-confidence Interval is High

categorical variable with discrete values

In [40]:
sns.lmplot(x='quality', y='alcohol', data=wine_data)
Out[40]:
<seaborn.axisgrid.FacetGrid at 0x1c47534b9b0>

hard to see individual data points so add some jittering - regression line is not affected by jittering

In [41]:
sns.lmplot(x='quality', y='alcohol', data=wine_data, x_jitter=.2)
Out[41]:
<seaborn.axisgrid.FacetGrid at 0x1c4757458d0>
In [42]:
import numpy as np
sns.lmplot(x='quality', y='alcohol', data=wine_data, x_estimator=np.mean )
Out[42]:
<seaborn.axisgrid.FacetGrid at 0x1c4757cb240>

Higher quality means higher alcohol content

In [43]:
# sns.lmplot(x='pH',y='fixed_acidity', data= wine_data, row='quality', hue='alcohol') # Fixed_acidity vs pH in each quality category with hue= alcohol : needs tuning
In [44]:
# sns.lmplot(x='pH',y='fixed_acidity', data= wine_data, col='quality', hue='alcohol') # Fixed_acidity vs pH in each quality category with hue= alcohol

regplot

regplot is similar to lmplot but accepts inputs in various forms such as numpy arrays pandas series dataframe variable references

regplot is axis level v/s lmplot is figure level

lmplot is more powerful and operates at higher level at matplotlib

In [45]:
sns.regplot(wine_data.alcohol, wine_data.density, color ='0')
Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c4758022e8>
In [46]:
f, ax = plt.subplots(figsize=(12,5))
sns.regplot(x=wine_data.residual_sugar, y=wine_data.density, ax=ax) # controlling size and shape
Out[46]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c475817a58>

joint plots also plots regression if kind is specified as 'reg'

In [47]:
sns.jointplot(x = wine_data.free_sulfur_dioxide, y= wine_data.total_sulfur_dioxide, xlim={0,150}, ylim={0,400}, kind='reg')
Out[47]:
<seaborn.axisgrid.JointGrid at 0x1c475a4fba8>
In [48]:
sns.pairplot(wine_data,x_vars=['fixed_acidity', 'citric_acid', 'chlorides'], y_vars=['alcohol'], kind='reg', height=8, aspect=1)
Out[48]:
<seaborn.axisgrid.PairGrid at 0x1c476add588>

CATEGORICAL AND MULTIPANEL DATA

In [49]:
f,ax=plt.subplots(figsize=(15,5))
sns.stripplot(x='quality',y='alcohol',data= wine_data)
Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c476fde898>
In [50]:
f,ax=plt.subplots(figsize=(15,5))
sns.swarmplot(x='quality',y='alcohol',data= wine_data, hue='pH')
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c4773ff2b0>
In [51]:
sns.dogplot()

BOX PLOT

In [52]:
f,ax = plt.subplots(figsize=(15,4))
sns.boxplot(x='quality', y='alcohol', data=wine_data)
Out[52]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c47781c550>

wiskers are mostly 1.5 times inter quantile ; any data outside this range is represented as outlier seperately

VIOLIN PLOT mit distribution probability

In [53]:
f,ax = plt.subplots(figsize=(15,4))
sns.violinplot(x='quality', y='alcohol', data=wine_data) 
Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c477999160>

The curved boundaries represents KDE , violin plot also shows mode ; violin plots can also be plotted relative to counts of individual data

In [54]:
f,ax = plt.subplots(figsize=(15,4))
sns.violinplot(x='quality', y='alcohol', data=wine_data, scale='count') 
Out[54]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c47807c518>

here you can scale the width with respect to number of counts of each categorical observation

In [55]:
f,ax = plt.subplots(figsize=(15,4))
sns.violinplot(x='quality', y='alcohol', data=wine_data, scale='count', inner='stick') 
Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c46fc88ef0>

scaled thickly closed lines suggest there are many samples in our data with quality 6 and at alcohol level 11.5

COMBINING SWARM AND VIOLIN PLOTS

In [56]:
f,ax = plt.subplots(figsize=(15,4))
sns.violinplot(x='quality', y='alcohol', data=wine_data) 
sns.swarmplot(x='quality',y='alcohol',data= wine_data)
Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c47c6e16d8>

STATISTICAL ESTIMATION

In [57]:
f, ax= plt.subplots(figsize=(15,5))
sns.barplot(x='quality', y='pH', data=wine_data)
Out[57]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c47c6d9780>
In [58]:
f, ax= plt.subplots(figsize=(15,5))
sns.countplot(x='alcohol', data=wine_data)
Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c47bc53ef0>
In [59]:
f, ax= plt.subplots(figsize=(20,5))
sns.countplot(x='alcohol', data=wine_data, color = 'm', palette='Greens_d')
plt.xticks(rotation=90)
Out[59]:
(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102]),
 <a list of 103 Text xticklabel objects>)

Number of wines at every alcohollevel

POINT PLOT

A point plot represents an estimate of central tendency for a numeric variable by the position of scatter plot points and provides some indication of the uncertainty around that estimate using error bars.

In [60]:
f, ax= plt.subplots(figsize=(20,5))
sns.pointplot(x='quality', y='pH', data=wine_data)
Out[60]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c47d7ebcc0>
In [62]:
# f, ax= plt.subplots(figsize=(28,5))
# sns.pointplot(x='quality', y='pH', data=wine_data, hue='alcohol') # hue here helps us to visualise the variation of the 

Observe how the alcohol content varies with the ph and quality of wine

BOX PLOT

In [63]:
f,ax = plt.subplots(figsize=(15,8))
sns.boxplot(data= wine_data, orient='h')
Out[63]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c47806a748>

Seaborn is a powerful visualization library built on top of matplotlib ; tightly integrated with PyData stack and makes production ready plots

Facet grids in Seaborn

In [65]:
titanic = pd.read_csv('titanic.csv')
In [66]:
titanic.head()
Out[66]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
In [67]:
g = sns.FacetGrid(titanic, col='Pclass')
In [68]:
g = sns.FacetGrid(titanic, col='Survived')
g.map(plt.hist,'Pclass', color='k')
Out[68]:
<seaborn.axisgrid.FacetGrid at 0x1c47e6b2d68>

Plotting bivariate data

In [69]:
g = sns.FacetGrid(titanic, col='Survived', height=8)
g.map(plt.scatter,'Age','Fare')
Out[69]:
<seaborn.axisgrid.FacetGrid at 0x1c47e6a1f60>
In [70]:
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(plt.scatter,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
Out[70]:
<seaborn.axisgrid.FacetGrid at 0x1c47ead3160>
In [71]:
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(plt.bar,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
Out[71]:
<seaborn.axisgrid.FacetGrid at 0x1c47f0c8a58>
In [72]:
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
g.map(plt.bar,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
Out[72]:
<seaborn.axisgrid.FacetGrid at 0x1c47fddad68>
In [73]:
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
g.map(sns.barplot,'Age','Fare')
c:\python37\lib\site-packages\seaborn\axisgrid.py:715: UserWarning: Using the barplot function without specifying `order` is likely to produce an incorrect plot.
  warnings.warn(warning)
Out[73]:
<seaborn.axisgrid.FacetGrid at 0x1c4002a9550>
In [74]:
# g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
# g.map(sns.regplot,'Age','Fare', fit_reg=False) # alpha is the opacity
# g.add_legend()
In [75]:
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
g.map(plt.scatter,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
Out[75]:
<seaborn.axisgrid.FacetGrid at 0x1c4029dd0b8>
In [76]:
# How about changing some colors
h={"male":'b',"female":'r'}
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex', palette=h)
g.map(plt.scatter,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
Out[76]:
<seaborn.axisgrid.FacetGrid at 0x1c40150ccf8>

NOW HOW ABOUT SOME FINE TUNIG!!!

In [77]:
g = sns.FacetGrid(titanic, col='Survived', height=8, col_wrap = 4)
g.map(sns.barplot,'Age','Fare')
Out[77]:
<seaborn.axisgrid.FacetGrid at 0x1c401791d30>
In [78]:
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
g.map(sns.barplot,'Age','Fare', color='#334488', edgecolor ='red',lw=.5 ) # color is a hex parameter
plt.xticks(rotation=90)
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing 

Changing labels

In [79]:
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8)
g.map(sns.barplot,'Age','Fare', color='#334488', edgecolor ='red',lw=.5 ) # color is a hex parameter
plt.xticks(rotation=90)
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing 
g.set_axis_labels('Age of Passengers','Fare of each Passenger')  
Out[79]:
<seaborn.axisgrid.FacetGrid at 0x1c4077d3080>

is this bar graph showing frequency of fares? no !! it is just giving one farevalue for each bucket of age; work on it later now!

Customizing the yticks

In [80]:
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(sns.scatterplot,'Age','Fare') # color is a hex parameter
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing 
g.set_axis_labels('Age of Passengers','Fare of each Passenger')  
g.set(yticks=[0,50,100,150,200,250,300,350,400,450,500])
Out[80]:
<seaborn.axisgrid.FacetGrid at 0x1c409a8c748>

xlim and ylim - limit your x and y ticks

In [81]:
g = sns.FacetGrid(titanic, col='Survived', height=5, hue='Sex', xlim=(0,16), ylim=(100,500))
g.map(sns.scatterplot,'Age','Fare') # color is a hex parameter
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing 
g.set_axis_labels('Age of Passengers','Fare of each Passenger')  
Out[81]:
<seaborn.axisgrid.FacetGrid at 0x1c40855c6a0>

this is function inbulit argument method to restrict our x and y ticks

In [82]:
g = sns.FacetGrid(titanic, col='Survived', height=5, hue='Sex')
g.map(sns.scatterplot,'Age','Fare') # color is a hex parameter
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing 
g.set_axis_labels('Age of Passengers','Fare of each Passenger')  
g.set(xlim=(0,16), ylim=(100,500))
Out[82]:
<seaborn.axisgrid.FacetGrid at 0x1c40886e320>

Machine learning bicycle data set

In [83]:
bike = pd.read_csv('bike_sharing_daily.csv')
In [84]:
bike.head()
Out[84]:
instant dteday season yr mnth holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 6 0 2 0.344167 0.363625 0.805833 0.160446 331 654 985
1 2 2011-01-02 1 0 1 0 0 0 2 0.363478 0.353739 0.696087 0.248539 131 670 801
2 3 2011-01-03 1 0 1 0 1 1 1 0.196364 0.189405 0.437273 0.248309 120 1229 1349
3 4 2011-01-04 1 0 1 0 2 1 1 0.200000 0.212122 0.590435 0.160296 108 1454 1562
4 5 2011-01-05 1 0 1 0 3 1 1 0.226957 0.229270 0.436957 0.186900 82 1518 1600
In [85]:
bike.columns
Out[85]:
Index(['instant', 'dteday', 'season', 'yr', 'mnth', 'holiday', 'weekday',
       'workingday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed',
       'casual', 'registered', 'cnt'],
      dtype='object')
In [86]:
bike.columns=['instant', 'date', 'season', 'year', 'month', 'holiday', 'weekday',
       'workingday', 'weathersituation', 'temp', 'atemp', 'humidity', 'windspeed',
       'casual_users', 'registered_users', 'cnt']
In [87]:
# bike.cnt.head(200) bike count per day may go above 8000 per day and some days as low as 100 bikes per day
In [88]:
f,ax = plt.subplots(figsize=(15,5))
sns.set(style="whitegrid")
# sns.set_xticklabels=([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000])
sns.distplot(bike.cnt, bins=120, kde=False, rug=True, color='k')
Out[88]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c40894a978>
In [89]:
f,ax = plt.subplots(figsize=(15,5))
sns.set(style="whitegrid")
# sns.set_xticklabels=([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000])
sns.distplot(bike.cnt, bins=120, kde=True, rug=True, color='k')
Out[89]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c408a06390>
In [90]:
sns.jointplot(bike.instant,bike.cnt, height=8, color='g') # number of bikes hired as a function of day passed 
Out[90]:
<seaborn.axisgrid.JointGrid at 0x1c40a7cdcf8>

two peaks in two years

In [91]:
sns.jointplot(bike.month,bike.cnt, height=8, color='g')
Out[91]:
<seaborn.axisgrid.JointGrid at 0x1c40a8d6828>

monthwise counts; as seen in months 8,9,10 more bicycles are rented

season wise rental data

In [92]:
sns.jointplot(bike.season,bike.cnt) # counts are higher in season 3 and 4
Out[92]:
<seaborn.axisgrid.JointGrid at 0x1c40af6f0b8>
In [93]:
sns.pairplot(bike,height=6, aspect=1.2,x_vars=['temp', 'humidity', 'windspeed'], y_vars='cnt', hue='season')
Out[93]:
<seaborn.axisgrid.PairGrid at 0x1c40b1f0198>

Themes and Styles

In [95]:
# sns.set_style('darkgrid') # applies to all lines ; affects all plots that follow
# f,ax = plt.subplots(figsize=(15,5))
# sns.scatterplot(bike.cnt)
In [96]:
with sns.axes_style('darkgrid'): # applies to only this lines 
        f,ax = plt.subplots(figsize=(15,5))
        sns.distplot(bike.cnt)

How to remove spines

In [97]:
sns.jointplot(bike.instant,bike.cnt, height=8, color='g')
sns.despine(offset=15, trim=True)
In [98]:
sns.set()
In [99]:
sns.jointplot(bike.instant,bike.cnt, height=8, color='g')
Out[99]:
<seaborn.axisgrid.JointGrid at 0x1c40b736dd8>

colors

In [100]:
current_palette = sns.color_palette()
sns.palplot(current_palette)
In [101]:
sns.palplot(sns.color_palette('hls',8)) # hls-> Hue Lightness and Saturation
In [102]:
sns.palplot(sns.hls_palette(n_colors=15, h=.99, l=0.5, s=.99))
In [105]:
sns.choose_colorbrewer_palette()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-105-fb23dbe0b0e1> in <module>
----> 1 sns.choose_colorbrewer_palette()

TypeError: choose_colorbrewer_palette() missing 1 required positional argument: 'data_type'
In [109]:
# !pip install ipywidgets
In [111]:
sns.choose_colorbrewer_palette('diverging')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-111-cddbe02a2b3e> in <module>
----> 1 sns.choose_colorbrewer_palette('diverging')

c:\python37\lib\site-packages\seaborn\widgets.py in choose_colorbrewer_palette(data_type, as_cmap)
    127         variants = ["regular", "reverse"]
    128 
--> 129         @interact
    130         def choose_diverging(name=opts, n=(2, 16),
    131                              desat=FloatSlider(min=0, max=1, value=1),

NameError: name 'interact' is not defined
In [112]:
# sequential color plots ; 
sns.palplot(sns.color_palette('Blues'))
In [113]:
# sequential color plots ; 
sns.palplot(sns.color_palette('Blues_d'))
In [114]:
sns.choose_colorbrewer_palette('sequential') 
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-114-48b7b01765fd> in <module>
----> 1 sns.choose_colorbrewer_palette('sequential')

c:\python37\lib\site-packages\seaborn\widgets.py in choose_colorbrewer_palette(data_type, as_cmap)
    105         variants = ["regular", "reverse", "dark"]
    106 
--> 107         @interact
    108         def choose_sequential(name=opts, n=(2, 18),
    109                               desat=FloatSlider(min=0, max=1, value=1),

NameError: name 'interact' is not defined
In [115]:
sns.palplot(sns.color_palette('cubehelix',12))
In [116]:
sns.palplot(sns.cubehelix_palette(12)) # good for printing 

Diverging color palette

In [117]:
h = sns.choose_colorbrewer_palette('diverging')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-117-e6dcdfb0e222> in <module>
----> 1 h = sns.choose_colorbrewer_palette('diverging')

c:\python37\lib\site-packages\seaborn\widgets.py in choose_colorbrewer_palette(data_type, as_cmap)
    127         variants = ["regular", "reverse"]
    128 
--> 129         @interact
    130         def choose_diverging(name=opts, n=(2, 16),
    131                              desat=FloatSlider(min=0, max=1, value=1),

NameError: name 'interact' is not defined
In [118]:
h = sns.choose_colorbrewer_palette('sequential')
g=sns.PairGrid(wine_data,x_vars=['chlorides','sulphates', 'fixed_acidity'], y_vars=['alcohol'], hue = 'pH',palette=h, height= 8)
g.map(sns.scatterplot) # sns looks more beautiful
# g.add_legend()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-118-638d6b1045d7> in <module>
----> 1 h = sns.choose_colorbrewer_palette('sequential')
      2 g=sns.PairGrid(wine_data,x_vars=['chlorides','sulphates', 'fixed_acidity'], y_vars=['alcohol'], hue = 'pH',palette=h, height= 8)
      3 g.map(sns.scatterplot) # sns looks more beautiful
      4 # g.add_legend()

c:\python37\lib\site-packages\seaborn\widgets.py in choose_colorbrewer_palette(data_type, as_cmap)
    105         variants = ["regular", "reverse", "dark"]
    106 
--> 107         @interact
    108         def choose_sequential(name=opts, n=(2, 18),
    109                               desat=FloatSlider(min=0, max=1, value=1),

NameError: name 'interact' is not defined

Aesthetics

In [119]:
sns.axes_style() # current style 
Out[119]:
{'axes.facecolor': '#EAEAF2',
 'axes.edgecolor': 'white',
 'axes.grid': True,
 'axes.axisbelow': True,
 'axes.labelcolor': '.15',
 'figure.facecolor': 'white',
 'grid.color': 'white',
 'grid.linestyle': '-',
 'text.color': '.15',
 'xtick.color': '.15',
 'ytick.color': '.15',
 'xtick.direction': 'out',
 'ytick.direction': 'out',
 'lines.solid_capstyle': 'round',
 'patch.edgecolor': 'w',
 'image.cmap': 'rocket',
 'font.family': ['sans-serif'],
 'font.sans-serif': ['Arial',
  'DejaVu Sans',
  'Liberation Sans',
  'Bitstream Vera Sans',
  'sans-serif'],
 'patch.force_edgecolor': True,
 'xtick.bottom': False,
 'xtick.top': False,
 'ytick.left': False,
 'ytick.right': False,
 'axes.spines.left': True,
 'axes.spines.bottom': True,
 'axes.spines.right': True,
 'axes.spines.top': True}
In [120]:
sns.set_style('ticks',{'xtick.major.size':8,'xtick.color': '.15','ytick.color': '.85','ytick.major.size':10,'axes.facecolor': 'm'})

sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
Out[120]:
<seaborn.axisgrid.JointGrid at 0x1c40d4fc780>
In [121]:
sns.set() # reset everything
In [122]:
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
Out[122]:
<seaborn.axisgrid.JointGrid at 0x1c40d789d68>

printable and seminar contexts

In [123]:
sns.set_context('talk')
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
Out[123]:
<seaborn.axisgrid.JointGrid at 0x1c40db5c7f0>
In [124]:
sns.set_context('paper')
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
Out[124]:
<seaborn.axisgrid.JointGrid at 0x1c40df27400>
In [125]:
sns.set_context('notebook')
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
Out[125]:
<seaborn.axisgrid.JointGrid at 0x1c40e2b5630>
In [126]:
sns.set_context('poster')
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
Out[126]:
<seaborn.axisgrid.JointGrid at 0x1c40e6a8b70>

continued!!!!!

In [ ]: